library(tidyverse)
library(WDI)
library(vtree)
library(ggrepel)
# new_cache <- WDIcache()
# WDIsearch(string = "Access to electricity", field = "name", short = TRUE, cache = new_cache)
wb_data <- WDI(
  country = "all",
  indicator = c("electricity_access" = "EG.ELC.ACCS.ZS", 
                "national_pov" = "SI.POV.NAHC",
                "mort_att_water" = "SH.STA.WASH.P5"), 
  start = 1960,
  end = 2020,
  extra = TRUE,
#  cache = new_cache,
  latest = 100, # Integer indicating the number of most recent non-NA values to get.
  language = "en"
)

Access to electricity

# vtree ----
wb_data %>% 
  filter(year == 2019) %>% 
  mutate(less_than = ifelse(electricity_access < 20, country, "More than or equal to")) %>% 
  filter(!is.na(region) & region != "Aggregates") %>% 
  vtree("region less_than", 
        showpct = FALSE,
        showcount = FALSE,
        summary="electricity_access \nAccess to electricity\n%mean% (%)",
        prune=list(less_than=c("More than or equal to")),
        labelvar=c(region = "Region"),
        showvarnames = FALSE,
        horiz = FALSE,
        title="**Access to electricity (% of population)** \n\n**World** \nNumber of countries"
  )

Poverty vs access to electricity

There is negative relationship between access to electricity and poverty. For certain years South Africa and Ukraine stand out with high levels in both indicators.

ggplot(data = wb_data, aes(x = electricity_access, y = national_pov)) +
  geom_point(aes(color = electricity_access > 75 & national_pov > 60)) +
  geom_label(aes(label = ifelse(electricity_access > 75 & national_pov > 60, 
                               paste0(country, "-", year), NA)),
            size = 2) +
  theme(legend.position = "none")

Have a look at recent years only.

wb_data %>% 
  filter(year >= 2015) %>% 
ggplot(aes(x = electricity_access, y = national_pov)) +
  geom_point(aes(color = between(electricity_access, 75, 100) & between(national_pov, 40, 60))) +
  geom_label_repel(aes(label = ifelse(between(electricity_access, 75, 100) & between(national_pov, 40, 60), 
                               paste0(country, "-", year), NA)),
            size = 2) +
  theme(legend.position = "none")

Analyze Afghanistan, Honduras and Mexico. The increase in access to electricity in Afghanistan is very interesting. Honduras on the other hand sometimes experience decreases in access to electricity.

wb_data %>% 
  filter(country %in% c("Afghanistan", "Honduras", "Mexico")) %>% 
ggplot(aes(x = electricity_access, y = national_pov)) +
  geom_point() +
  geom_label_repel(aes(label = paste0(country, "-", year)), 
                   size = 2) +
  theme(legend.position = "none")

Countries having electricity access less than 50 percent in any year

elec_less_than_50 <- 
  wb_data %>% 
  filter(!is.na(region) & region != "Aggregates") %>% # filter out aggregates
  filter(electricity_access < 50) %>% 
  select(country) %>%
  unique() %>% 
  unlist(use.names = FALSE)

In the data, there are a total of 59 countries that had less than 50 percent electricity access.

Early improvers. Increase to over 80 percent before 2010.

wb_data %>% 
  filter(country %in% elec_less_than_50) %>% 
ggplot(aes(x = year, y = electricity_access)) +
  geom_point() +
  geom_label_repel(aes(label = ifelse(electricity_access > 80 & year < 2010, 
                                      paste0(country, "-", year), NA)))

These countries are Indonesia and Morocco. Let’s see how they evolved.

wb_data %>% 
  filter(country %in% c("Indonesia", "Morocco")) %>% 
ggplot(aes(x = year, y = electricity_access)) +
  geom_point() +
  geom_label_repel(aes(label = paste0(country, "-", round(electricity_access, 1))))

This is not a good way to visualize. Let’s use geom_line. Sudden increase and decreases could be related to data quality issues.

wb_data %>% 
  filter(country %in% c("Indonesia", "Morocco")) %>% 
ggplot(aes(x = year, y = electricity_access, color = country)) +
  geom_line() 

All countries with values below 50 percent.

wb_data %>% 
  filter(country %in% elec_less_than_50) %>% 
ggplot(aes(x = year, y = electricity_access, color = country)) +
  geom_line() +
  theme(legend.position = "none")

Sub-Saharan Africa All countries with values below 50 percent.

wb_data %>% 
  filter(country %in% elec_less_than_50) %>% 
  filter(region == "Sub-Saharan Africa") %>% 
ggplot(aes(x = year, y = electricity_access, color = country)) +
  geom_line() +
  scale_x_continuous(limits = c(1990, 2020)) +
  theme(legend.position = "none")

Which increased over 60 percent.

improved_sub_sahara_c <- 
wb_data %>% 
  filter(country %in% elec_less_than_50) %>% 
  filter(region == "Sub-Saharan Africa") %>% 
  filter(electricity_access >= 60) %>% 
  select(country) %>% 
  unique() %>% 
  unlist(use.names = FALSE)
wb_data %>% 
  filter(country %in% improved_sub_sahara_c) %>% 
  mutate(label = if_else(year == max(year), as.character(country), NA_character_)) %>% 
ggplot(aes(x = year, y = electricity_access, color = country)) +
  geom_line() +
  geom_label_repel(aes(label = label)) +
  scale_x_continuous(limits = c(1990, 2020)) +
  theme(legend.position = "bottom")

Compare minimum and maximums.

most_improve_5_c <- 
wb_data %>% 
  filter(!is.na(region) & region != "Aggregates" & !is.na(electricity_access)) %>%
  group_by(country) %>% 
  summarise(min_elec = min(electricity_access), max_elec = max(electricity_access)) %>% 
  mutate(range_elec = round(max_elec - min_elec, 1)) %>% 
  arrange(desc(range_elec)) %>% 
  head(5) %>% 
  select(country) %>% 
  unique() %>% 
  unlist(use.names = FALSE)
wb_data %>% 
  filter(country %in% most_improve_5_c) %>% 
ggplot(aes(x = year, y = electricity_access, color = country)) +
  geom_line()

Mortality rate attributed to unsafe water

wb_data %>% 
  ggplot(aes(x = year, y = mort_att_water)) +
  geom_point()

wb_data %>% 
  filter(!is.na(mort_att_water)) %>% 
  ggplot(aes(x = country, y = mort_att_water)) +
  geom_col()

wb_data %>% 
  filter(!is.na(region) & region != "Aggregates") %>% 
  filter(!is.na(mort_att_water) & mort_att_water >= 25) %>% 
  ggplot(aes(x = country, y = mort_att_water, fill = region)) +
  geom_col() +
  geom_label_repel(aes(label = ifelse(region == "Middle East & North Africa" | mort_att_water >= 50, 
                                      country, NA))) +
  theme(legend.position = "bottom")